Sys.setlocale("LC_ALL","English")
## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"
library(tidyverse)
library(sf)
library(furrr)
library(lubridate)
library(rnaturalearth)
library(igraph)
read_hotspot <- function(directory = "data/Himawari-8", from = '20190501', to = '20190701'){
  ### include from, exclude to
  
  fls = list.files(directory, full.names = TRUE)
  fls_short = list.files("data/Himawari-8", full.names = FALSE)
  fls_filter = (fls_short > paste('H08', from, sep = '_')) & (fls_short < paste('H08', to, sep = '_'))
  fls = fls[fls_filter]
  all = map(fls, read_csv, quote="'")
  d = bind_rows(all)
  return(d)
  
}
hotspots = read_hotspot(from = "20191001", to ="20200331")
# filter au
hotspots = hotspots %>%
  filter(between(lon, 112, 155)) %>% 
  filter(between(lat, -44, -10))
au_map = ne_states(country = 'Australia', returnclass = 'sf') #get au map

hotspots = hotspots %>%
  filter(firepower > 100)
hotspots = st_as_sf(x = hotspots, coords = c('lon','lat'))
st_crs(hotspots) = 4326
tab = st_intersects(au_map$geometry, hotspots$geometry)
## although coordinates are longitude/latitude, st_intersects assumes that they are planar
hotspots$state = ''
for (i in seq(1,nrow(au_map))){
  
  hotspots$state[tab[[i]]] = au_map$name[i]
  
}
unidentified_point = hotspots[hotspots$state == '',]
tab2 = st_nearest_feature(unidentified_point$geometry, au_map$geometry)
hotspots$state[hotspots$state == ''] = au_map$name[tab2]
hotspots$year = year(hotspots$`#obstime`)
hotspots$month = month(hotspots$`#obstime`)
hotspots$day = day(hotspots$`#obstime`)
hotspots$week = week(hotspots$`#obstime`)
hotspots$hour = hour(hotspots$`#obstime`)
# filter VIC
hotspots_VIC = hotspots %>%
  filter(state == "Victoria")
rm(hotspots)
ggplot(hotspots_VIC) +
  geom_boxplot(aes(x = paste(year,month,sep = '-'), y = firepower, group = month.abb[month])) +
  xlab('Date') +
  ggtitle('VIC from Oct')

t = hotspots_VIC %>%
  group_by(year,month,day) %>%
  summarise(cases = n(), total_firepower = sum(firepower))
ggplot(t) +
  geom_line(aes(ymd(as.character(year * 10000 + month*100 + day)), log(cases), col  ='log(cases)')) +
  geom_line(aes(ymd(as.character(year * 10000 + month*100 + day)), log(total_firepower/cases), col='log(firepower_per_case)')) +
  xlab('Date') +
  ggtitle('Compare cases with firepower per case by day in log scale')

ggplot(t) +
  geom_line(aes(ymd(as.character(year * 10000 + month*100 + day)), log(cases)/sd(log(cases)), col  ='log(cases)/sd')) +
  geom_line(aes(ymd(as.character(year * 10000 + month*100 + day)), log(total_firepower/cases) / sd(log(total_firepower/cases)), col='log(firepower_per_case)/sd')) +
  xlab('Date') +
  ggtitle('Compare cases with firepower per case by day in log scale')

ggplot(t) +
  geom_point(aes(log(cases), log(total_firepower/cases))) +
  geom_smooth(aes(log(cases), log(total_firepower/cases)), method = 'lm')

# assign hour index for data
hotspots_VIC$hour_id = hotspots_VIC %>%
  group_indices(year,month,day,hour)

hotspots_VIC$obs_id = 1:nrow(hotspots_VIC)
hotspots_VIC$fire_id = NA
first_hour = filter(hotspots_VIC, hour_id == 1) %>%
  select(obs_id, year, month, day, hour_id, geometry)
# calculate dist matrix of first hour

dist_matrix = matrix(as.numeric(st_distance(first_hour$geometry, first_hour$geometry)),ncol =nrow(first_hour))

# if the distance between two points is less than 3 km, create an edge between them
neighbors = (dist_matrix<3000) 

# create graph by adjacency matrix
first_graph = graph.adjacency(neighbors, mode = 'undirected')

# compute clusters
cluster_result = clusters(first_graph)

# assign membership to each point
first_hour$fire_id = cluster_result$membership

# record centersfor each hour
group_centers = list()

# compute the centroid of each group
group_centers[[1]] = as.data.frame(st_coordinates(first_hour)) %>%
  mutate(fire_id = first_hour$fire_id) %>%
  group_by(fire_id) %>%
  summarise(lon = mean(X), lat = mean(Y))

group_centers[[1]] = st_as_sf(x = group_centers[[1]], coords = c("lon","lat"), crs = 4326)
group_centers[[1]]$active = 0

# assign the fire id back to the master data set
hotspots_VIC$fire_id[match(first_hour$obs_id, hotspots_VIC$obs_id)] = first_hour$fire_id
plot(first_graph)

ggplot()+
  geom_sf(data = first_hour, aes(shape = factor(fire_id))) +
  geom_sf(data = group_centers[[1]], col = 'red', size =2)

for (i in seq(2,200)){
  
  # copy the centers info from last hour
  group_centers[[i]] = group_centers[[i-1]]
  # let the active minus 1
  group_centers[[i]]$active = group_centers[[i]]$active - 1
  
  
  # get the current hour hotspots data
  current_hour = filter(hotspots_VIC, hour_id == i) %>%
  select(obs_id, geometry)
  
  # get the active fire center
  active_group = filter(group_centers[[i]], active > -24)
  
  # append the center geometry to the hotspots geometry vector
  if (nrow(active_group)>0){
    geom_info = c(current_hour$geometry, active_group$geometry)
  } else {
    geom_info = current_hour$geometry
  }
  
  
  # compute the dist matrix
  dist_matrix = matrix(as.numeric(st_distance(geom_info, geom_info)),ncol =nrow(current_hour)+nrow(active_group))
  
  # connect two points if they have less than 3000m distance
  neighbors = (dist_matrix<3000) 
  
  # use the adjancency to create a graph
  current_graph = graph.adjacency(neighbors, mode = 'undirected')
  
  # find the components in the graph
  cluster_result = clusters(current_graph)
  
  # get the membership for the hotspots
  current_fire_id = cluster_result$membership[1:nrow(current_hour)]
  
  current_hour$fire_id = NA
  
  # get the membership for the active centers
  if (nrow(active_group)>0){
    previous_center_current_id = cluster_result$membership[(nrow(current_hour)+1):length(cluster_result$membership)]
    
    # if the hotspots share the same group as active centers, assign those hotspots a previous fire_id
    current_hour$fire_id =  active_group$fire_id[match(current_fire_id, previous_center_current_id)]
  }
  
  # hotspots that doesn't in the same group as active centers will have a missing value, we need to assign them new fire id
  
  # we first get the membership of those points
  temp_membership = current_fire_id[is.na(current_hour$fire_id)]
  
  # we need to slightly adjust the membership to make it start from 1 and common difference equal to 1
  temp_membership = data.frame(temp_membership = temp_membership) %>%
    group_indices(temp_membership)
  # and add the total number of fire we knew
  temp_membership = temp_membership + nrow(group_centers[[i]])

    
  # assign them back to those missing entries
  current_hour$fire_id[is.na(current_hour$fire_id)] = temp_membership

  
  # now all the current hour points has a fire id, we can recompute the centers
  current_centers = as.data.frame(st_coordinates(current_hour)) %>%
  mutate(fire_id = current_hour$fire_id) %>%
  group_by(fire_id) %>%
  summarise(lon = mean(X), lat = mean(Y))
  
  
  current_centers$active = 0
  current_centers = st_as_sf(x = current_centers, coords = c("lon","lat"), crs = 4326)
  
  # replace the center info
  group_centers[[i]] = group_centers[[i]] %>%
    filter(!(fire_id %in% current_centers$fire_id))
  group_centers[[i]] = rbind(group_centers[[i]], current_centers)
  group_centers[[i]] = arrange(group_centers[[i]], fire_id)
  
  # assign the fire id back to master data set
  hotspots_VIC$fire_id[match(current_hour$obs_id, hotspots_VIC$obs_id)] = current_hour$fire_id
    
}
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:igraph':
## 
##     groups
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
test = filter(hotspots_VIC, hour_id <= 200)
temp = test %>%
  group_by(fire_id) %>%
  summarise(start_date = min(hour_id), end_date = max(hour_id))
coo = st_coordinates(test)
test$lon = coo[,1]
test$lat = coo[,2]
test = test %>%
  mutate(start_date = temp$start_date[fire_id], end_date = temp$end_date[fire_id])
p <- ggplot() +
  geom_point(data = test, aes(x = lon, y=lat, group = factor(fire_id), col = factor(fire_id), text1 = start_date, text2 = end_date)) +
  theme_bw() +
  theme(legend.position = "none")
## Warning: Ignoring unknown aesthetics: text1, text2
gg <- ggplotly(p, tooltip=c('group',"text1","text2"))

gg
library(gganimate)
myfun = function(x,y){
  temp = as.data.frame(st_coordinates(x)) %>%
    mutate(hour_id = y)
  temp = temp %>%
    mutate(fire_id = 1:nrow(temp))
  temp
}

center_move = imap(group_centers, ~myfun(.x,.y)) %>%
  reduce(rbind)

a <- filter(center_move, fire_id == 66) %>%
 ggplot()+
  geom_point(aes(x = X,y = Y), col = 'red', size = 5) +
  theme_bw()+
  theme(legend.position = "none") +
  transition_states(hour_id,
                    transition_length = 2,
                    state_length = 1)+
  shadow_mark(color = 'black', size = 2, past = TRUE, future = FALSE)

a